{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ff78977e-ebf6-4bc5-9734-0de20611d5d7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: llama_index in /opt/conda/lib/python3.11/site-packages (0.10.55)\n",
      "Requirement already satisfied: llama-index-llms-huggingface in /opt/conda/lib/python3.11/site-packages (0.2.4)\n",
      "Requirement already satisfied: llama-index-embeddings-huggingface in /opt/conda/lib/python3.11/site-packages (0.2.2)\n",
      "Requirement already satisfied: accelerate in /opt/conda/lib/python3.11/site-packages (0.32.1)\n",
      "Requirement already satisfied: bitsandbytes in /opt/conda/lib/python3.11/site-packages (0.43.1)\n",
      "Requirement already satisfied: llama-index-agent-openai<0.3.0,>=0.1.4 in /opt/conda/lib/python3.11/site-packages (from llama_index) (0.2.8)\n",
      "Requirement already satisfied: llama-index-cli<0.2.0,>=0.1.2 in /opt/conda/lib/python3.11/site-packages (from llama_index) (0.1.12)\n",
      "Requirement already satisfied: llama-index-core==0.10.55 in /opt/conda/lib/python3.11/site-packages (from llama_index) (0.10.55)\n",
      "Requirement already satisfied: llama-index-embeddings-openai<0.2.0,>=0.1.5 in /opt/conda/lib/python3.11/site-packages (from llama_index) (0.1.10)\n",
      "Requirement already satisfied: llama-index-indices-managed-llama-cloud>=0.2.0 in /opt/conda/lib/python3.11/site-packages (from llama_index) (0.2.5)\n",
      "Requirement already satisfied: llama-index-legacy<0.10.0,>=0.9.48 in /opt/conda/lib/python3.11/site-packages (from llama_index) (0.9.48)\n",
      "Requirement already satisfied: llama-index-llms-openai<0.2.0,>=0.1.13 in /opt/conda/lib/python3.11/site-packages (from llama_index) (0.1.25)\n",
      "Requirement already satisfied: llama-index-multi-modal-llms-openai<0.2.0,>=0.1.3 in /opt/conda/lib/python3.11/site-packages (from llama_index) (0.1.7)\n",
      "Requirement already satisfied: llama-index-program-openai<0.2.0,>=0.1.3 in /opt/conda/lib/python3.11/site-packages (from llama_index) (0.1.6)\n",
      "Requirement already satisfied: llama-index-question-gen-openai<0.2.0,>=0.1.2 in /opt/conda/lib/python3.11/site-packages (from llama_index) (0.1.3)\n",
      "Requirement already satisfied: llama-index-readers-file<0.2.0,>=0.1.4 in /opt/conda/lib/python3.11/site-packages (from llama_index) (0.1.30)\n",
      "Requirement already satisfied: llama-index-readers-llama-parse>=0.1.2 in /opt/conda/lib/python3.11/site-packages (from llama_index) (0.1.6)\n",
      "Requirement already satisfied: PyYAML>=6.0.1 in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (6.0.1)\n",
      "Requirement already satisfied: SQLAlchemy>=1.4.49 in /opt/conda/lib/python3.11/site-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core==0.10.55->llama_index) (2.0.22)\n",
      "Requirement already satisfied: aiohttp<4.0.0,>=3.8.6 in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (3.8.6)\n",
      "Requirement already satisfied: dataclasses-json in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (0.6.7)\n",
      "Requirement already satisfied: deprecated>=1.2.9.3 in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (1.2.14)\n",
      "Requirement already satisfied: dirtyjson<2.0.0,>=1.0.8 in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (1.0.8)\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (2023.9.2)\n",
      "Requirement already satisfied: httpx in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (0.27.0)\n",
      "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (1.5.8)\n",
      "Requirement already satisfied: networkx>=3.0 in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (3.2)\n",
      "Requirement already satisfied: nltk<4.0.0,>=3.8.1 in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (3.8.1)\n",
      "Requirement already satisfied: numpy<2.0.0 in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (1.24.4)\n",
      "Requirement already satisfied: openai>=1.1.0 in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (1.35.14)\n",
      "Requirement already satisfied: pandas in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (2.1.1)\n",
      "Requirement already satisfied: pillow>=9.0.0 in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (10.1.0)\n",
      "Requirement already satisfied: requests>=2.31.0 in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (2.31.0)\n",
      "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.2.0 in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (8.5.0)\n",
      "Requirement already satisfied: tiktoken>=0.3.3 in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (0.7.0)\n",
      "Requirement already satisfied: tqdm<5.0.0,>=4.66.1 in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (4.66.1)\n",
      "Requirement already satisfied: typing-extensions>=4.5.0 in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (4.8.0)\n",
      "Requirement already satisfied: typing-inspect>=0.8.0 in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (0.9.0)\n",
      "Requirement already satisfied: wrapt in /opt/conda/lib/python3.11/site-packages (from llama-index-core==0.10.55->llama_index) (1.16.0)\n",
      "Requirement already satisfied: huggingface-hub<0.24.0,>=0.23.0 in /opt/conda/lib/python3.11/site-packages (from llama-index-llms-huggingface) (0.23.5)\n",
      "Requirement already satisfied: text-generation<0.8.0,>=0.7.0 in /opt/conda/lib/python3.11/site-packages (from llama-index-llms-huggingface) (0.7.0)\n",
      "Requirement already satisfied: torch<3.0.0,>=2.1.2 in /opt/conda/lib/python3.11/site-packages (from llama-index-llms-huggingface) (2.3.1)\n",
      "Requirement already satisfied: transformers<5.0.0,>=4.37.0 in /opt/conda/lib/python3.11/site-packages (from transformers[torch]<5.0.0,>=4.37.0->llama-index-llms-huggingface) (4.42.4)\n",
      "Requirement already satisfied: sentence-transformers>=2.6.1 in /opt/conda/lib/python3.11/site-packages (from llama-index-embeddings-huggingface) (3.0.1)\n",
      "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.11/site-packages (from accelerate) (23.2)\n",
      "Requirement already satisfied: psutil in /opt/conda/lib/python3.11/site-packages (from accelerate) (5.9.5)\n",
      "Requirement already satisfied: safetensors>=0.3.1 in /opt/conda/lib/python3.11/site-packages (from accelerate) (0.4.3)\n",
      "Requirement already satisfied: filelock in /opt/conda/lib/python3.11/site-packages (from huggingface-hub<0.24.0,>=0.23.0->llama-index-llms-huggingface) (3.15.4)\n",
      "Requirement already satisfied: minijinja>=1.0 in /opt/conda/lib/python3.11/site-packages (from huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface) (2.0.1)\n",
      "Requirement already satisfied: llama-cloud>=0.0.9 in /opt/conda/lib/python3.11/site-packages (from llama-index-indices-managed-llama-cloud>=0.2.0->llama_index) (0.0.9)\n",
      "Requirement already satisfied: beautifulsoup4<5.0.0,>=4.12.3 in /opt/conda/lib/python3.11/site-packages (from llama-index-readers-file<0.2.0,>=0.1.4->llama_index) (4.12.3)\n",
      "Requirement already satisfied: pypdf<5.0.0,>=4.0.1 in /opt/conda/lib/python3.11/site-packages (from llama-index-readers-file<0.2.0,>=0.1.4->llama_index) (4.3.0)\n",
      "Requirement already satisfied: striprtf<0.0.27,>=0.0.26 in /opt/conda/lib/python3.11/site-packages (from llama-index-readers-file<0.2.0,>=0.1.4->llama_index) (0.0.26)\n",
      "Requirement already satisfied: llama-parse>=0.4.0 in /opt/conda/lib/python3.11/site-packages (from llama-index-readers-llama-parse>=0.1.2->llama_index) (0.4.9)\n",
      "Requirement already satisfied: scikit-learn in /opt/conda/lib/python3.11/site-packages (from sentence-transformers>=2.6.1->llama-index-embeddings-huggingface) (1.3.1)\n",
      "Requirement already satisfied: scipy in /opt/conda/lib/python3.11/site-packages (from sentence-transformers>=2.6.1->llama-index-embeddings-huggingface) (1.11.3)\n",
      "Requirement already satisfied: pydantic<3,>2 in /opt/conda/lib/python3.11/site-packages (from text-generation<0.8.0,>=0.7.0->llama-index-llms-huggingface) (2.8.2)\n",
      "Requirement already satisfied: sympy in /opt/conda/lib/python3.11/site-packages (from torch<3.0.0,>=2.1.2->llama-index-llms-huggingface) (1.12)\n",
      "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.11/site-packages (from torch<3.0.0,>=2.1.2->llama-index-llms-huggingface) (3.1.2)\n",
      "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /opt/conda/lib/python3.11/site-packages (from torch<3.0.0,>=2.1.2->llama-index-llms-huggingface) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /opt/conda/lib/python3.11/site-packages (from torch<3.0.0,>=2.1.2->llama-index-llms-huggingface) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /opt/conda/lib/python3.11/site-packages (from torch<3.0.0,>=2.1.2->llama-index-llms-huggingface) (12.1.105)\n",
      "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /opt/conda/lib/python3.11/site-packages (from torch<3.0.0,>=2.1.2->llama-index-llms-huggingface) (8.9.2.26)\n",
      "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /opt/conda/lib/python3.11/site-packages (from torch<3.0.0,>=2.1.2->llama-index-llms-huggingface) (12.1.3.1)\n",
      "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /opt/conda/lib/python3.11/site-packages (from torch<3.0.0,>=2.1.2->llama-index-llms-huggingface) (11.0.2.54)\n",
      "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /opt/conda/lib/python3.11/site-packages (from torch<3.0.0,>=2.1.2->llama-index-llms-huggingface) (10.3.2.106)\n",
      "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /opt/conda/lib/python3.11/site-packages (from torch<3.0.0,>=2.1.2->llama-index-llms-huggingface) (11.4.5.107)\n",
      "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /opt/conda/lib/python3.11/site-packages (from torch<3.0.0,>=2.1.2->llama-index-llms-huggingface) (12.1.0.106)\n",
      "Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /opt/conda/lib/python3.11/site-packages (from torch<3.0.0,>=2.1.2->llama-index-llms-huggingface) (2.20.5)\n",
      "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /opt/conda/lib/python3.11/site-packages (from torch<3.0.0,>=2.1.2->llama-index-llms-huggingface) (12.1.105)\n",
      "Requirement already satisfied: triton==2.3.1 in /opt/conda/lib/python3.11/site-packages (from torch<3.0.0,>=2.1.2->llama-index-llms-huggingface) (2.3.1)\n",
      "Requirement already satisfied: nvidia-nvjitlink-cu12 in /opt/conda/lib/python3.11/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch<3.0.0,>=2.1.2->llama-index-llms-huggingface) (12.5.82)\n",
      "Requirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.11/site-packages (from transformers<5.0.0,>=4.37.0->transformers[torch]<5.0.0,>=4.37.0->llama-index-llms-huggingface) (2024.5.15)\n",
      "Requirement already satisfied: tokenizers<0.20,>=0.19 in /opt/conda/lib/python3.11/site-packages (from transformers<5.0.0,>=4.37.0->transformers[torch]<5.0.0,>=4.37.0->llama-index-llms-huggingface) (0.19.1)\n",
      "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core==0.10.55->llama_index) (23.1.0)\n",
      "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /opt/conda/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core==0.10.55->llama_index) (3.3.0)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core==0.10.55->llama_index) (6.0.4)\n",
      "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/conda/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core==0.10.55->llama_index) (4.0.3)\n",
      "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core==0.10.55->llama_index) (1.9.2)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core==0.10.55->llama_index) (1.4.0)\n",
      "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.6->llama-index-core==0.10.55->llama_index) (1.3.1)\n",
      "Requirement already satisfied: soupsieve>1.2 in /opt/conda/lib/python3.11/site-packages (from beautifulsoup4<5.0.0,>=4.12.3->llama-index-readers-file<0.2.0,>=0.1.4->llama_index) (2.5)\n",
      "Requirement already satisfied: anyio in /opt/conda/lib/python3.11/site-packages (from httpx->llama-index-core==0.10.55->llama_index) (4.0.0)\n",
      "Requirement already satisfied: certifi in /opt/conda/lib/python3.11/site-packages (from httpx->llama-index-core==0.10.55->llama_index) (2023.7.22)\n",
      "Requirement already satisfied: httpcore==1.* in /opt/conda/lib/python3.11/site-packages (from httpx->llama-index-core==0.10.55->llama_index) (1.0.5)\n",
      "Requirement already satisfied: idna in /opt/conda/lib/python3.11/site-packages (from httpx->llama-index-core==0.10.55->llama_index) (3.4)\n",
      "Requirement already satisfied: sniffio in /opt/conda/lib/python3.11/site-packages (from httpx->llama-index-core==0.10.55->llama_index) (1.3.0)\n",
      "Requirement already satisfied: h11<0.15,>=0.13 in /opt/conda/lib/python3.11/site-packages (from httpcore==1.*->httpx->llama-index-core==0.10.55->llama_index) (0.14.0)\n",
      "Requirement already satisfied: click in /opt/conda/lib/python3.11/site-packages (from nltk<4.0.0,>=3.8.1->llama-index-core==0.10.55->llama_index) (8.1.7)\n",
      "Requirement already satisfied: joblib in /opt/conda/lib/python3.11/site-packages (from nltk<4.0.0,>=3.8.1->llama-index-core==0.10.55->llama_index) (1.3.2)\n",
      "Requirement already satisfied: distro<2,>=1.7.0 in /opt/conda/lib/python3.11/site-packages (from openai>=1.1.0->llama-index-core==0.10.55->llama_index) (1.9.0)\n",
      "Requirement already satisfied: annotated-types>=0.4.0 in /opt/conda/lib/python3.11/site-packages (from pydantic<3,>2->text-generation<0.8.0,>=0.7.0->llama-index-llms-huggingface) (0.7.0)\n",
      "Requirement already satisfied: pydantic-core==2.20.1 in /opt/conda/lib/python3.11/site-packages (from pydantic<3,>2->text-generation<0.8.0,>=0.7.0->llama-index-llms-huggingface) (2.20.1)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.11/site-packages (from requests>=2.31.0->llama-index-core==0.10.55->llama_index) (2.0.7)\n",
      "Requirement already satisfied: greenlet!=0.4.17 in /opt/conda/lib/python3.11/site-packages (from SQLAlchemy>=1.4.49->SQLAlchemy[asyncio]>=1.4.49->llama-index-core==0.10.55->llama_index) (3.0.0)\n",
      "Requirement already satisfied: mypy-extensions>=0.3.0 in /opt/conda/lib/python3.11/site-packages (from typing-inspect>=0.8.0->llama-index-core==0.10.55->llama_index) (1.0.0)\n",
      "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /opt/conda/lib/python3.11/site-packages (from dataclasses-json->llama-index-core==0.10.55->llama_index) (3.21.3)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.11/site-packages (from jinja2->torch<3.0.0,>=2.1.2->llama-index-llms-huggingface) (2.1.3)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.11/site-packages (from pandas->llama-index-core==0.10.55->llama_index) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.11/site-packages (from pandas->llama-index-core==0.10.55->llama_index) (2023.3.post1)\n",
      "Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.11/site-packages (from pandas->llama-index-core==0.10.55->llama_index) (2023.3)\n",
      "Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.11/site-packages (from scikit-learn->sentence-transformers>=2.6.1->llama-index-embeddings-huggingface) (3.2.0)\n",
      "Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.11/site-packages (from sympy->torch<3.0.0,>=2.1.2->llama-index-llms-huggingface) (1.3.0)\n",
      "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas->llama-index-core==0.10.55->llama_index) (1.16.0)\n"
     ]
    }
   ],
   "source": [
    "!pip install llama_index llama-index-llms-huggingface llama-index-embeddings-huggingface accelerate bitsandbytes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "79a069a1-b264-468e-ba0f-9c68bd3fb0fc",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.11/site-packages/pydantic/_internal/_fields.py:161: UserWarning: Field \"model_id\" has conflict with protected namespace \"model_\".\n",
      "\n",
      "You may be able to resolve this warning by setting `model_config['protected_namespaces'] = ()`.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "# setup prompts - specific to StableLM\n",
    "from llama_index.core import PromptTemplate\n",
    "from llama_index.llms.huggingface import HuggingFaceLLM\n",
    "from llama_index.core import Settings\n",
    "import torch\n",
    "from llama_index.core.prompts.prompts import SimpleInputPrompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "2fad1f18-df60-4955-b4b5-17ff3d250eac",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "38592618fbd349e7bdadc99294a45503",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some parameters are on the meta device device because they were offloaded to the disk and cpu.\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "system_prompt = \"You are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided.\"\n",
    "\n",
    "# This will wrap the default prompts that are internal to llama-index\n",
    "query_wrapper_prompt = SimpleInputPrompt(\"<|USER|>{query_str}<|ASSISTANT|>\")\n",
    "\n",
    "model_name=\"microsoft/phi-2\"\n",
    "model_name=\"microsoft/Phi-3-mini-4k-instruct\"\n",
    "\n",
    "\n",
    "llm = HuggingFaceLLM(\n",
    "    context_window=1024,\n",
    "    max_new_tokens=64,\n",
    "    generate_kwargs={\"temperature\": 0.25, \"do_sample\": True},\n",
    "    system_prompt=system_prompt,\n",
    "    query_wrapper_prompt=query_wrapper_prompt,\n",
    "    tokenizer_name=model_name,\n",
    "    model_name=model_name,\n",
    "    #device_map=\"cuda\",\n",
    "    # uncomment this if using CUDA to reduce memory usage\n",
    "    model_kwargs={\"torch_dtype\": \"auto\"}\n",
    ")\n",
    "\n",
    "from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n",
    "\n",
    "embed_model = HuggingFaceEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\n",
    "\n",
    "\n",
    "from llama_index.core import Settings\n",
    "Settings.chunk_size = 512\n",
    "Settings.llm = llm\n",
    "Settings.embed_model = embed_model\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "10d1545e-8465-43d6-bdd1-6e72acaf3e7b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core import SimpleDirectoryReader\n",
    "\n",
    "reader = SimpleDirectoryReader(\n",
    "    input_files=[\"events.txt\"],\n",
    "    required_exts=[\".txt\"],\n",
    "    num_files_limit=100, # Maximum number of files to load\n",
    "    encoding=\"latin-1\",\n",
    ")\n",
    "documents = reader.load_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "7a76892c-ab41-4b59-9135-638d1e263f4e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "env: OPENAI_API_KEY=<your key>\n"
     ]
    }
   ],
   "source": [
    "%env OPENAI_API_KEY=<your key>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "787e82f9-8078-471b-be67-6603ae6eea12",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<your key>\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "print(os.environ['OPENAI_API_KEY'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "6c0047dd-a90d-4788-9846-d1fbf7269135",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6cd9ba7332a047e68465318c6ed76c2d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "922caed2f9424fc08688b38610535e3a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating embeddings:   0%|          | 0/120 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from llama_index.core import VectorStoreIndex\n",
    "\n",
    "# 1. Load VectorStoreIndex directly from Documents\n",
    "index = VectorStoreIndex.from_documents(documents, show_progress=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3f8b8b2e-0496-471d-8ed0-8016e5bfc523",
   "metadata": {},
   "source": [
    "#Load VectorStoreIndex by selecting the splitter(chunk_size, chunk_overlap) and embedded model directly\n",
    "\n",
    "from llama_index.core.node_parser import SentenceSplitter\n",
    "from llama_index.embeddings.openai import OpenAIEmbedding\n",
    "from llama_index.core import VectorStoreIndex\n",
    "\n",
    "node_parser = SentenceSplitter(chunk_size=256, chunk_overlap=20)\n",
    "nodes = node_parser.get_nodes_from_documents(documents)\n",
    "index = VectorStoreIndex(nodes, embed_model=embed_model, show_progress=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "bb2195f7-9ff9-499e-b660-e5a6277f599d",
   "metadata": {},
   "outputs": [],
   "source": [
    "query_engine = index.as_query_engine()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "4ccc0f35-d9ae-4d8f-8168-dcb2acea252f",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "You are not running the flash-attention implementation, expect numerical differences.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Based on the provided context, when the Rotational speed is about 1342 rpm, the anomaly that can be observed is a Heat dissipation failure (HDF). This anomaly occurs when heat dissipation causes a process failure, if the difference between air- and process temperature\n"
     ]
    }
   ],
   "source": [
    "response = query_engine.query(\"When Rotational speed is about 1342 what can be the anomaly?\")\n",
    "print(f\"{response}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f2477ce9-4973-4a40-bf4d-2ef3b8e97568",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
